
** file to construct dataset to mimic Deryugina format and instrument (match weather and pollution data within zip, cluster monitors for wind instruments)

***Inputs
* $Data/GameUser_pollution_zd_inzip.dta
* $Data/GameUserweighted_ISDweather_zd.dta
* $Data/GameUserData.dta 
* $Data/AQS_clusters_inzip.dta

***Outputs
* $Data/ForFullIV_cluster_inzip.csv


use $Data/GameUser_pollution_zd_inzip.dta, clear

** match pollution to weather 
merge 1:1 zip date using  $Data/GameUserweighted_ISDweather_zd.dta

drop _merge

** match pollution and weather to games 
merge 1:m zip date using $Data/GameUserData.dta 
 drop _merge


	sort anon_id nth_play 
		 
	keep if weighted_temperature <. & (median_closest <. | weighted_calm<.)  & dailypm25 <.
		 
	
	 duplicates tag anon_id time_stamp, gen(dup)
	 
	 by anon_id time_stamp (nth_play), sort: gen todrop = _n>1
	 
	 drop if todrop ==1
	 
	 drop dup todrop
	 
	 
	 xtset anon_id time_stamp	 
	
**normalize game scores 

	
	levelsof gamenum

	local gamelist  = r(levels)
		
		gen norm_score =.
		gen std_score = .
		
		
		foreach g in `gamelist' {
		
		summ score_raw if gamenum ==`g'
		local mu = r(mean)
		local sigma= r(sd)
		
		replace norm_score = ((score_raw - `mu')/`sigma') if gamenum==`g'
		
		
		replace std_score = (score_raw/`sigma') if gamenum==`g'
		
		}
	

	
	
	label variable norm_score "Normalized score"
	label variable std_score "Standardized score"
	
	
	
** generate variables 	
	
	gen month = month(date)
	
	gen monthyear = mofd(date)

	gen highpm =  dailypm > 25 & dailypm<. 
	gen modpm = dailypm>12 & dailypm<. 
	
	egen cut_temp = cut(weighted_temperature), at(-40(10)40)

	merge m:1 zip using $Data/AQS_clusters_inzip.dta // import monitor clusters for instrument 
	keep if _merge ==3
	
	drop _merge
	
	

	
	
	egen cut_winddirection = cut(median_closestwind), at(0(60)360)
	replace cut_winddirection = 0 if median_closestwind ==360

	
	levelsof cut_winddirection if cut_winddirection>0
	local winds = r(levels)	
	
		foreach w in `winds' {
			
			gen clusterfromb60`w'dir = 0
			replace clusterfromb60`w'dir = monitorcluster if cut_winddirection ==`w'
			
			
	}
		
		
	

	keep norm_score highpm modpm weighted*  anon_id monthyear cut_temp zip*  *from* dailypm cut_wind
	
	export delimited using $Data/ForFullIV_cluster_inzip.csv, replace 

	
	
